/**
* Copyright (c) 2009-2012, Regents of the University of Colorado
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package com.googlecode.clearnlp.constituent;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;

import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntOpenHashSet;
import com.googlecode.clearnlp.conversion.C2DInfo;
import com.googlecode.clearnlp.morphology.MPLib;
import com.googlecode.clearnlp.propbank.PBLoc;
import com.googlecode.clearnlp.util.pair.StringIntPair;


/**
 * Constituent node.
 * @since 1.0.0
 * @author Jinho D. Choi ({@code choijd@colorado.edu})
 */
public class CTNode implements Comparable<CTNode>
{
	/** The phrase or pos tag of this node. */
	public String      pTag;
	/** The function tags of this node (default: empty). */
	protected Set<String> s_fTags;
	/** The co-index of this node (default: {@code -1}). */
	public int coIndex  = -1;
	/** The gap-index of this node (default: {@code -1}). */
	public int gapIndex = -1;
	/** The word-form of this node (default: {@code null}). */
	public String form  = null;
	
	/** The parent of this node (default: {@code null}). */
	protected CTNode parent     = null;
	/** The antecedent of this node (default: {@code null}). */
	protected CTNode antecedent = null;
	/** The children of this node. */
	protected List<CTNode> ls_children;
	/** The ID (starting at 0) of this node among other terminal nodes within the tree (default: {@code -1}). */
	protected int i_terminalId = -1;
	/** The ID (starting at 0) of this node among other terminal nodes (disregarding empty categories) within the tree (default: {@code -1}). */
	protected int i_tokenId    = -1;
	/** The ID (starting at 0) of this node among its siblings (default: {@code -1}). */
	protected int i_siblingId  = -1;
	/** The PropBank location of this node (default: {@code null}). */
	protected PBLoc  pb_loc    = null;
	
	/** The information of constituent-to-dependency conversion. */
	public C2DInfo c2d = null;
	/** The list of PropBank predicate ID and label pairs. */
	public List<StringIntPair> pbArgs = null;
	
	/**
     * Constructs a constituent node.
     * @param tags {@link CTNode#pTag}{@code (-}{@link CTNode#s_fTags}{@code )*(-}{@link CTNode#coIndex}{@code ){0,1}(=}{@link CTNode#gapIndex}{@code ){0,1}}.
     */
	public CTNode(String tags)
	{
		setTags(tags);
		ls_children = new ArrayList<CTNode>();
	}
	
	/**
     * Constructs a constituent node with the specific word-form.
     * @param tags see the {@code tags} parameter in {@link CTNode#CTNode(String, CTNode)}.
     * @param form the word-form of this node.
     */
	public CTNode(String tags, String form)
	{
		this(tags);
		this.form = form;
	}
	
//	======================== Getters ========================

	/**
     * Returns all tags of this node in the Penn Treebank format.
     * See the {@code tags} parameter in {@link CTNode#CTNode(String, CTNode)}.
     * @return all tags of this node in the Penn Treebank format.
     */
	public String getTags()
	{
		StringBuilder build = new StringBuilder();
		
		build.append(pTag);
		
		for (String fTag : s_fTags)
		{
			build.append("-");
			build.append(fTag);
		}
		
		if (coIndex != -1)
		{
			build.append("-");
			build.append(coIndex);
		}
		
		if (gapIndex != -1)
		{
			build.append("=");
			build.append(gapIndex);
		}
		
		return build.toString();
	}
	
	/**
     * Returns the set of function tags of this node.
     * @return the set of function tags of this node.
     */
	public Set<String> getFTags()
	{
		return s_fTags;
	}
	
	/**
     * Returns the ID (starting at 0) of this node among other terminal nodes within the tree (default: {@code -1}).
     * @return the ID (starting at 0) of this node among other terminal nodes within the tree (default: {@code -1}).
     */
	public int getTerminalId()
	{
		return i_terminalId;
	}
	
	/**
     * Returns the ID (starting at 0) of this node among other terminal nodes (disregarding empty categories) within the tree (default: {@code -1}).
     * @return the ID (starting at 0) of this node among other terminal nodes (disregarding empty categories) within the tree (default: {@code -1}).
     */
	public int getTokenId()
	{
		return i_tokenId;
	}
	
	/**
     * Returns the ID (starting at 0) of this node among its siblings (default: {@code -1}).
     * @return the ID (starting at 0) of this node among its siblings (default: {@code -1}).
     */
	public int getSiblingId()
	{
		return i_siblingId;
	}
	
	/**
     * Returns the PropBank location of this node (default: {@code null}).
     * @return the PropBank location of this node (default: {@code null}).
     */
	public PBLoc getPBLoc()
	{
		return pb_loc;
	}
	
	/**
     * Returns the parent of this node.
     * @return the parent of this node.
     */
	public CTNode getParent()
	{
		return parent;
	}
	
	/**
     * Returns the antecedent of this node (default: {@code null}).
     * @return the antecedent of this node (default: {@code null}).
     */
	public CTNode getAntecedent()
	{
		return antecedent;
	}
	
	/**
     * Returns a list of all children of this node.
     * @return a list of all children of this node.
     */
	public List<CTNode> getChildren()
	{
		return ls_children;
	}
	
	/**
     * Returns an immutable list of sub-children of this node.
     * The sublist begins at the specific position and extends to the end.
     * @param fstId the ID of the first child (inclusive).
     * @return an immutable list of sub-children of this node.
     * @throws IndexOutOfBoundsException for an illegal ID.
     */
	public List<CTNode> getChildren(int fstId)
	{
		return ls_children.subList(fstId, ls_children.size());
	}
	
	/**
	 * Returns an immutable list of sub-children of this node.
     * The sublist begins and ends at the specific positions.
     * @param fstId the ID of the first child (inclusive).
     * @param lstId the ID of the last child (exclusive)
     * @return an immutable list of sub-children of this node.
     * @throws IndexOutOfBoundsException for an illegal ID.
     */
	public List<CTNode> getChildren(int fstId, int lstId)
	{
		return ls_children.subList(fstId, lstId);
	}
	
	public CTNode getChild(int childId)
	{
		return (0 <= childId && childId < ls_children.size()) ? ls_children.get(childId) : null;
	}
	
	public CTNode getFirstChild(String... tags)
	{
		for (CTNode child : ls_children)
		{
			if (child.isTag(tags))
				return child;
		}
		
		return null;
	}
	
	public CTNode getLastChild(String... tags)
	{
		CTNode child;	int i;
		
		for (i=ls_children.size()-1; i>=0; i--)
		{
			child = ls_children.get(i);
			
			if (child.isTag(tags))
				return child;
		}
		
		return null;
	}
	
	public List<CTNode> getAllChildren(String... tags)
	{
		List<CTNode> list = new ArrayList<CTNode>();
		
		for (CTNode child : ls_children)
		{
			if (child.isTag(tags))
				list.add(child);
		}
		
		return list;
	}
	
	public CTNode getFirstDescendant(String... tags)
	{
		return getFirstDescendantAux(ls_children, tags);
	}
	
	private CTNode getFirstDescendantAux(List<CTNode> nodes, String... tags)
	{
		CTNode desc;
		
		for (CTNode node : nodes)
		{
			if (node.isTag(tags))	return node;
			
			desc = getFirstDescendantAux(node.ls_children, tags);
			if (desc != null)	return desc;
		}
		
		return null;
	}

	public CTNode getPrevSibling()
	{
		List<CTNode> siblings = parent.ls_children;
		return (0 <= i_siblingId-1) ? siblings.get(i_siblingId-1) : null;
	}
	
	public CTNode getPrevSibling(String... tags)
	{
		if (parent == null) return null;
		List<CTNode> siblings = parent.ls_children;
		CTNode node; int i;
		
		for (i=i_siblingId-1; i>=0; i--)
		{
			node = siblings.get(i);
			
			if (node.isTag(tags))
				return node;
		}
		
		return null;
	}
	
	public List<CTNode> getPrevSiblings()
	{
		return (parent != null) ? parent.getChildren(0, this.i_siblingId) : new ArrayList<CTNode>(0);
	}
	
	public CTNode getNextSibling()
	{
		List<CTNode> siblings = parent.ls_children;
		return (i_siblingId+1 < siblings.size()) ? siblings.get(i_siblingId+1) : null;
	}
	
	public CTNode getNextSibling(String... tags)
	{
		if (parent == null) return null;
		List<CTNode> siblings = parent.ls_children;
		CTNode node; int i, size = siblings.size();
		
		for (i=i_siblingId+1; i<size; i++)
		{
			node = siblings.get(i);
			
			if (node.isTag(tags))
				return node;
		}
		
		return null;
	}
	
	public CTNode getNearestAncestor(String... tags)
	{
		CTNode curr = this;
		
		while (curr.parent != null)
		{
			curr = curr.parent;
			if (curr.isTag(tags)) return curr;
		}
		
		return null;
	}
	
	public CTNode getFirstChainedDescendant(String... tags)
	{
		CTNode desc = this, child;
		
		while ((child = desc.getFirstChild(tags)) != null)
			desc = child;
		
		return (desc != this) ? desc : null;
	}
	
	public CTNode getHighestChainedAncestor(String... tags)
	{
		CTNode curr = this;
		
		while (curr.parent != null && curr.parent.isTag(tags))
			curr = curr.parent;
	
		return (curr == this) ? null : curr;
	}
	
	public List<CTNode> getIncludedEmptyCategory(String regex)
	{
		List<CTNode> list = new ArrayList<CTNode>();
		
		getIncludedEmptyCategoriesAux(this, list, regex);
		return list;
	}
	
	private void getIncludedEmptyCategoriesAux(CTNode curr, List<CTNode> list, String regex)
	{
		if (curr.isEmptyCategory() && curr.form.matches(regex))
			list.add(curr);
		
		for (CTNode child : curr.ls_children)
			getIncludedEmptyCategoriesAux(child, list, regex);
	}
	
	public List<CTNode> getSubTerminals()
	{
		List<CTNode> terminals = new ArrayList<CTNode>();
		
		getSubTerminals(this, terminals);
		return terminals;
	}
	
	private void getSubTerminals(CTNode curr, List<CTNode> terminals)
	{
		if (curr.isPhrase())
		{
			for (CTNode child : curr.ls_children)
				getSubTerminals(child, terminals);
		}
		else
			terminals.add(curr);
	}
	
	public IntArrayList getSubTerminalIdList()
	{
		IntArrayList list = new IntArrayList();
		
		for (CTNode node : getSubTerminals())
			list.add(node.getTerminalId());
		
		return list;
	}
	
	public IntOpenHashSet getSubTerminalIdSet()
	{
		IntOpenHashSet set = new IntOpenHashSet();
		
		for (CTNode node : getSubTerminals())
			set.add(node.getTerminalId());
		
		return set;
	}
	
	public List<CTNode> getSubTokens()
	{
		List<CTNode> tokens = new ArrayList<CTNode>();
		
		getSubTokens(this, tokens);
		return tokens;
	}
	
	private void getSubTokens(CTNode curr, List<CTNode> tokens)
	{
		if (curr.isPhrase())
		{
			for (CTNode child : curr.ls_children)
				getSubTokens(child, tokens);
		}
		else if (!curr.isEmptyCategory())
			tokens.add(curr);
	}
	
	public CTNode getFirstTerminal()
	{
		return getFirstTerminalAux(this);
	}
	
	private CTNode getFirstTerminalAux(CTNode node)
	{
		List<CTNode> children = node.getChildren();
		if (children.isEmpty())	return node;
		
		return getFirstTerminalAux(children.get(0));
	}
	
	public int getChildrenSize()
	{
		return ls_children.size();
	}
	
	public CTNode getLowestCommonAncestor(CTNode node)
	{
		if (this.isDescendantOf(node))	return node;
		if (node.isDescendantOf(this))	return this;
		
		CTNode parent = getParent();
		
		while (parent != null)
		{
			if (node.isDescendantOf(parent))
				return parent;
			
			parent = parent.getParent();
		}
		
		return null;
	}
	
//	======================== Setters ========================
	
	public void setTags(String tags)
	{
		s_fTags = new TreeSet<String>();
		
		if (tags.charAt(0) == '-')
		{
			pTag = tags;
			return;
		}
		
		StringTokenizer tok = new StringTokenizer(tags, "-=", true);
		String delim, tag;
		
		pTag = tok.nextToken();
		
		while (tok.hasMoreTokens())
		{
			delim = tok.nextToken();
			if (!tok.hasMoreTokens())
			{
				System.err.println("Error: illegal tag \""+tags+"\"");
				break;
			}
			tag = tok.nextToken();
			
			if (delim.equals("-"))
			{
				if (MPLib.containsOnlyDigits(tag))
				{
					if (coIndex == -1)
						coIndex = Integer.parseInt(tag);
					else
						gapIndex = Integer.parseInt(tag);
				}
				else
					s_fTags.add(tag);
			}
			else // if (delim.equals("="))
				gapIndex = Integer.parseInt(tag);
		}
	}
	
	public void addFTag(String fTag)
	{
		s_fTags.add(fTag);
	}
	
	public void addFTags(Collection<String> fTags)
	{
		s_fTags.addAll(fTags);
	}
	
	public void removeFTag(String fTag)
	{
		s_fTags.remove(fTag);
	}
	
	public void removeFTagAll()
	{
		s_fTags.clear();
	}
	
	public void setAntecedent(CTNode ante)
	{
		antecedent = ante;
	}
	
	public void addChild(CTNode child)
	{
		child.parent = this;
		child.i_siblingId = ls_children.size();
		
		ls_children.add(child);
	}
	
	public void addChild(int index, CTNode child)
	{
		ls_children.add(index, child);
		
		child.parent = this;
		resetSiblingIDs(index);
	}
	
	public void setChild(int index, CTNode child)
	{
		ls_children.set(index, child).parent = null;
		
		child.parent = this;
		child.i_siblingId = index;
	}
	
	public void removeChild(int index)
	{
		if (!isChildrenRange(index))
			throw new IndexOutOfBoundsException(Integer.toString(index));
		
		ls_children.remove(index).parent = null;
		resetSiblingIDs(index);
	}
	
	private void resetSiblingIDs(int index)
	{
		int i, size = ls_children.size();
		
		for (i=index; i<size; i++)
			ls_children.get(i).i_siblingId = i;
	}
	
	public void removeChild(CTNode child)
	{
		removeChild(ls_children.indexOf(child));
	}

	public void resetChildren(Collection<CTNode> children)
	{
		ls_children.clear();
		
		for (CTNode child : children)
			addChild(child);
	}
	
//	======================== Booleans ========================
	
	public boolean isPTag(String pTag)
	{
		return this.pTag.equals(pTag);
	}
	
	public boolean isPTagAny(String... pTags)
	{
		for (String pTag : pTags)
		{
			if (isPTag(pTag))
				return true;
		}
		
		return false;
	}
	
	public boolean matchesPTag(String regex)
	{
		return pTag.matches("^"+regex+"$");
	}
	
	public boolean isFTag(String fTag)
	{
		return (s_fTags.size() == 1) && (s_fTags.contains(fTag)); 
	}
	
	public boolean hasFTag(String fTag)
	{
		return s_fTags.contains(fTag);
	}
	
	public boolean hasFTagAll(Collection<String> fTags)
	{
		return this.s_fTags.containsAll(fTags);
	}
	
	public boolean hasFTagAny(Collection<String> fTags)
	{
		for (String fTag : fTags)
		{
			if (this.s_fTags.contains(fTag))
				return true;
		}
		
		return false;
	}
	
	public boolean hasFTagAny(String... fTags)
	{
		for (String fTag : fTags)
		{
			if (this.s_fTags.contains(fTag))
				return true;
		}
		
		return false;
	}
	
	public boolean isTag(String... tags)
	{
		String      pTag  = null;
		String      pRex  = null;
		Set<String> fTags = new HashSet<String>();
		
		for (String tag : tags)
		{
			if (tag.equals(CTLib.POS_NONE) || tag.equals(CTLibEn.POS_LRB) || tag.equals(CTLibEn.POS_RRB))
				pTag = tag;
			else
			{
				switch (tag.charAt(0))
				{
				case '-': fTags.add(tag.substring(1));	break;
				case '+': pRex = tag.substring(1);		break;
				default : pTag = tag;
				}				
			}
		}
		
		return (pTag == null || isPTag (pTag)) &&
		       (pRex == null || matchesPTag(pRex)) &&
		       hasFTagAll(fTags);
	}
	
	public boolean isForm(String form)
	{
		return this.form != null && this.form.equals(form);
	}
	
	public boolean isPhrase()
	{
		return !ls_children.isEmpty();
	}
	
	public boolean isEmptyCategory()
	{
		return pTag.equals(CTLib.POS_NONE);
	}
	
	public boolean isEmptyCategoryRec()
	{
		CTNode curr = this;
		
		while (curr.isPhrase())
		{
			if (curr.ls_children.size() > 1)
				return false;
			else
				curr = curr.getChild(0);
		}
		
		return curr.isEmptyCategory();
	}
	
	public boolean isDescendantOf(CTNode node)
	{
		CTNode parent = getParent();

		while (parent != null)
		{
			if (parent == node)
				return true;
			
			parent = parent.getParent();
		}
		
		return false;
	}
	
	public boolean containsTags(String... tags)
	{
		for (CTNode child : ls_children)
		{
			if (child.isTag(tags))
				return true;
		}
		
		return false;
	}
	
	/**
	 * 
	 *
	 * @param index 
	 * @return 
	 */
	public boolean isChildrenRange(int index)
	{
		return 0 <= index && index < ls_children.size();
	}

	
//	======================== Strings ========================

	public String toForms(boolean includeNulls, String delim)
	{
		StringBuilder build = new StringBuilder();
		
		for (CTNode node : getSubTerminals())
		{
			if (includeNulls || !node.isEmptyCategory())
			{
				build.append(delim);
				build.append(node.form);
			}
		}
		
		return build.length() == 0 ? "" : build.substring(delim.length());
	}
	
	/* (non-Javadoc)
	 * @see java.lang.Object#toString()
	 */
	public String toString()
	{
		return toString(false, false);
	}
	
	public String toString(boolean... args)
	{
		boolean includeLineNumbers  = (args.length > 0) ? args[0] : false;
		boolean includeAntePointers = (args.length > 1) ? args[1] : false;
		
		ArrayList<String> lTree = new ArrayList<String>();
		toStringAux(this, lTree, "", includeAntePointers);
		
		StringBuilder build = new StringBuilder();
		
		for (int i=0; i<lTree.size(); i++)
		{
			if (includeLineNumbers)
				build.append(String.format("%3d: %s\n", i, lTree.get(i)));
			else
				build.append(lTree.get(i)+"\n");
		}
			
		build.deleteCharAt(build.length()-1);	// remove the last '\n'
		return build.toString();
	}
	
	public String toStringLine()
	{
		ArrayList<String> lTree = new ArrayList<String>();
		toStringAux(this, lTree, "", false);
		
		StringBuilder build = new StringBuilder();
		
		for (int i=0; i<lTree.size(); i++)
			build.append(lTree.get(i).trim()+" ");
			
		build.deleteCharAt(build.length()-1);	// remove the last ' '
		return build.toString();
	}
	
	private void toStringAux(CTNode curr, ArrayList<String> lTree, String sTags, boolean includeAntePointers)
	{
		if (curr.isPhrase())
		{
			sTags += "("+curr.getTags()+" ";
		//	sTags += "("+curr.getTags()+"-"+curr.pbLoc+" ";
			
			for (CTNode child : curr.ls_children)
			{
				toStringAux(child, lTree, sTags, includeAntePointers);
				
				if (child.i_siblingId == 0)
					sTags = sTags.replaceAll(".", " ");		// indent
			}

			int last = lTree.size() - 1;
			lTree.set(last, lTree.get(last)+")");
		}
		else
		{
			String tag = sTags+"("+curr.getTags()+" "+curr.form+")";
			if (includeAntePointers && curr.antecedent != null)
				tag += "["+curr.antecedent.getTags()+"]"; 
			lTree.add(tag);
		}
	}
	
	@Override
	public int compareTo(CTNode node)
	{
		return i_terminalId - node.i_terminalId;
	}
}
